#libraries
library(readr)
library(tidyverse)
library(dplyr)
library(ggmap)
library(ggplot2)
library(ggpubr)
arrests <- read_csv("https://raw.githubusercontent.com/paulmj7/STOR320_Group0_Project/master/arrests.csv")
head(arrests)
## # A tibble: 6 x 22
##      X1    id `Primary Charge` Street Street1 Street2 City  State Zipcode
##   <dbl> <dbl> <chr>            <chr>  <chr>   <chr>   <chr> <chr>   <dbl>
## 1     1 40515 ASSAULT-SIMPLE   117 O~ OLD DU~ <NA>    CHAP~ NC      27517
## 2     2 40498 FAIL TO APPEAR/~ 214 W~ WEST R~ <NA>    CHAP~ NC      27516
## 3     3 40496 PUBLIC URINATION 107 E~ EPHESU~ LEGION~ CHAP~ NC      27517
## 4     4 40497 AFFRAY/ASSAULT ~ 117 O~ OLD DU~ <NA>    CHAP~ NC      27517
## 5     5 40495 ASSAULT-SIMPLE   233 M~ MCCAUL~ <NA>    CHAP~ NC      27514
## 6     6 40492 POSS MARIJUANA ~ 143 W~ WEST F~ CHURCH~ CHAP~ NC      27516
## # ... with 13 more variables: `Date of Arrest` <date>, `Time of Arrest` <time>,
## #   Age <dbl>, Race <chr>, Gender <chr>, Ethnicity <chr>, `Type of
## #   Arrest` <chr>, `Drugs or Alcohol Present` <chr>, `Weapon Present` <chr>,
## #   Disposition <chr>, latitude_longitude <chr>, latitude <dbl>,
## #   longitude <dbl>
#cleaning code to sort arrests into latlong group and save grouping lines
arrests <- filter(arrests, longitude < -78.95 & longitude >-79.11 & latitude > 35.85 & latitude < 35.98)

arrests <- within(arrests, {
  grp.lat = cut(latitude, 10, labels = FALSE)
  grp.long = cut(longitude, 10, labels = FALSE)
})

vec_group_long <- vector()
for (i in 1:10) {
  vec_group_long[i] <- min(arrests$longitude[arrests$grp.long==i])
}
vec_group_long[11] <- -78.95

vec_group_lat <- vector()
for (i in 1:10) {
  vec_group_lat[i] <- min(arrests$latitude[arrests$grp.lat==i])
}
vec_group_lat[11] <- 35.98

Graphs that may be useful

arrests$weekday <- weekdays(arrests$`Date of Arrest`)
arrests$weekday <- factor(arrests$weekday, level = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
x <- group_by(arrests, weekday) %>%
  summarise(count = n()) %>%
  ggplot(aes(x=weekday, y=count)) + geom_col(fill='steel blue') + theme_minimal() +
  labs(x="Weekday", y="Number of Arrests", title = "Number of Arrests by Weekday")

arrests$month <- months(arrests$'Date of Arrest')
arrests$month <- factor(arrests$month, level = c("January", "February", "March", "April", "May","June", "July", "August", "September", "October", "November", "December"))
y <- group_by(arrests, month) %>%
  summarise(count = n()) %>%
  ggplot(aes(x=month, y=count)) + geom_col(fill='steel blue') + 
  theme_minimal() + theme(axis.text.x =element_text(angle = 45)) +
  xlab("Month") + ylab("Number of Arrests") + labs(title="Number of Arrests by Month from Dataset")

figure <- ggarrange(x, y,
                    ncol = 1, nrow = 2)
figure

#survey <- read_csv("C:/Users/gabri/OneDrive/Documents/Junior Year/Spring '20/STOR 320/Project/cleaned_survey.csv") 

survey <- read_delim("https://raw.githubusercontent.com/paulmj7/STOR320_Group0_Project/master/q5-public-safety-police-services.csv", ";", escape_double = FALSE, trim_ws = TRUE)
head(survey)
## # A tibble: 6 x 13
##      id `Q5-5. Overall ~ `Q5-6. The visi~ `Q5-7. The Town~ `Q5-8. How quic~
##   <dbl> <chr>            <chr>            <chr>            <chr>           
## 1   128 Very Satisfied   Very Satisfied   Very Satisfied   Very Satisfied  
## 2   169 Very Satisfied   Very Satisfied   Very Satisfied   Very Satisfied  
## 3   243 Very Satisfied   Satisfied        Satisfied        Don't Know      
## 4   308 Very Satisfied   Very Satisfied   Very Satisfied   Very Satisfied  
## 5   338 Neutral          Neutral          Satisfied        Don't Know      
## 6   379 Very Satisfied   Very Satisfied   Very Satisfied   Very Satisfied  
## # ... with 8 more variables: `Q5-9. Enforcement of local traffic laws` <chr>,
## #   `Q5-10. Police safety education programs` <chr>, `Q5-11. Chapel Hill Police
## #   Department's overall performance` <chr>, `Q5-12. The attitude and behavior
## #   of Police Department personnel toward residents` <chr>, `Q5-13. The level
## #   of safety and security in your neighborhood` <chr>, LATITUDE <dbl>,
## #   LONGITUDE <dbl>, location <chr>
#cleaning code for survey data
survey <-rename(survey,
            Protection_Quality = 'Q5-5. Overall quality of local police protection',
            Visibility = 'Q5-6. The visibility of police in neighborhoods',
            Prevention = "Q5-7. The Town's efforts to prevent crime",
            Reponse = "Q5-8. How quickly police respond to emergencies",
            Traffic_Enforcement = "Q5-9. Enforcement of local traffic laws", 
            Education_Programs = "Q5-10. Police safety education programs",
            Overall_Performance = "Q5-11. Chapel Hill Police Department's overall performance",
            Attitude = "Q5-12. The attitude and behavior of Police Department personnel toward residents",
            Safety = "Q5-13. The level of safety and security in your neighborhood")

survey2 <- survey
for ( i in 2:10) {
 survey2[i] <- sapply(survey2[i], function(x) as.integer(factor(as.character(x), levels=c("Very Dissatisfied", "Dissatisfied", "Neutral", "Satisfied", "Very Satisfied"))))
}

#functions to use same groups created for arrest data for satisfaction data
assign_latgroup <- function (x) {
  for ( i in 1:10) {
    if (vec_group_lat[i] <= x & x < vec_group_lat[i+1]) {
      return (i)
    }
  }
  return (NA)
}

assign_longgroup <- function(x) {
  for ( i in 1:10) {
    if (vec_group_long[i] <= x & x < vec_group_long[i+1]) {
      return(i)
    }
  }
  return(NA)
}


#create groups for survey data
for (i in 1:nrow(survey2)) {
  survey2$grp.lat[i] <- assign_latgroup(survey2$LATITUDE[i])
}
for( i in 1:nrow(survey2)) {
  survey2$grp.long[i] <- assign_longgroup((survey2$LONGITUDE[i]))
}

survey <- survey2

Survey data

#map of surveys collected with latitude and longitude grouping lines
# can add facet = 'Overall_Performance' to get maps of answers
library(ggmap)
register_google(key = "AIzaSyAz_F8FVVhZbjfsz4rCz4cEigy6__5cQKc")
survey2 <- survey %>%
  mutate_at(vars(Overall_Performance), funs(factor))
qmplot(LONGITUDE, LATITUDE, data=survey2, maptype='toner-lite', color = Overall_Performance, 
   xlim= c(vec_group_long[1], vec_group_long[11]), 
   ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8) + 
  geom_hline(yintercept = vec_group_lat) +
  geom_vline(xintercept = vec_group_long) +
  ggtitle("Location of Surveys by Overall Performance Score")

#count of latlong groups for survey data
x1 <- survey %>%
  group_by(grp.lat, grp.long) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

x2 <- survey %>%
  group_by(grp.lat, grp.long) %>%
  summarise(count = n()) %>%
  ggplot(aes(x=grp.long, y=grp.lat, fill = count)) + 
    geom_tile() + 
    scale_fill_viridis_c() + 
    theme_classic() +
    xlim(.5, 10.5) +
    scale_y_continuous(breaks = seq(1, 10, by = 1)) +
    scale_x_continuous( breaks = seq(1, 10, by = 1)) +
    theme(axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          plot.title = element_text(size=8),
          legend.title = element_text(size=8),
          axis.title = element_text(size=9)) + 
    geom_vline(xintercept = seq(.5, 10.5, 1)) +
    geom_hline(yintercept = seq(.5, 10.5, 1)) + 
    ggtitle("Number of Surveys Collected by LatLong Grouping") +
    xlab("Longitude Group") +
    ylab("Latitude Group")

#Average Overall Preformance score by latlong groups
y1 <- survey %>%
  filter(!is.na(Overall_Performance)) %>%
  group_by(grp.lat, grp.long) %>%
  summarise(satisfaction = mean(Overall_Performance, na.rm=TRUE))

y2 <- survey %>%
  filter(!is.na(Overall_Performance)) %>%
  group_by(grp.lat, grp.long) %>%
  summarise(satisfaction = mean(Overall_Performance, na.rm=TRUE)) %>%
  ggplot(aes(x=grp.long, y=grp.lat, fill = satisfaction)) + 
    geom_tile() + 
    scale_fill_viridis_c() + 
    theme_classic() +
    xlim(.5, 10.5) +
    scale_y_continuous(breaks = seq(1, 10, by = 1)) +
    scale_x_continuous( breaks = seq(1, 10, by = 1)) +
    theme(axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          plot.title = element_text(size=8),
          legend.title = element_text(size=8),
          axis.title = element_text(size=9)) + 
    geom_vline(xintercept = seq(.5, 10.5, 1)) +
    geom_hline(yintercept = seq(.5, 10.5, 1)) + 
    ggtitle("Average Overall Performance score by LatLong Grouping") +
    xlab("Longitude Group") +
    ylab("Latitude Group")

# average of each person's answers to all questions, grouped by latlong group and then averaged
z1 <- survey %>%
  mutate(average_score = rowMeans(survey[, 2:10], na.rm= TRUE)) %>%
  na.omit(average_score) %>%
  group_by(grp.lat, grp.long) %>%
  summarise(Overall_average = mean(average_score))

z2 <- survey %>%
  mutate(average_score = rowMeans(survey[, 2:10], na.rm= TRUE)) %>%
  na.omit(average_score) %>%
  group_by(grp.lat, grp.long) %>%
  summarise(Overall_average = mean(average_score)) %>%
    ggplot(aes(x=grp.long, y=grp.lat, fill = Overall_average)) + 
    geom_tile() + 
    scale_fill_viridis_c() + 
    theme_classic() +
    xlim(.5, 10.5) +
    scale_y_continuous(breaks = seq(1, 10, by = 1)) +
    scale_x_continuous( breaks = seq(1, 10, by = 1)) +
    theme(axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          plot.title = element_text(size=8),
          legend.title = element_text(size=8),
          axis.title = element_text(size=9)) + 
    geom_vline(xintercept = seq(.5, 10.5, 1)) +
    geom_hline(yintercept = seq(.5, 10.5, 1)) + 
    ggtitle("Average of Survey Questions by LatLong Grouping") +
    xlab("Longitude Group") +
    ylab("Latitude Group")


full_join(x1, y1, by = c("grp.lat", "grp.long")) %>%
  full_join(z1, by = c("grp.lat", "grp.long")) %>%
  as.data.frame()
##    grp.lat grp.long count satisfaction Overall_average
## 1        3        3    27     4.000000        3.820513
## 2        7        7    27     4.346154        4.097222
## 3        9        6    24     4.260870        4.464646
## 4        6        4    22     3.818182        3.791667
## 5        8        6    22     4.000000        3.777778
## 6        4        3    20     4.050000        4.255556
## 7        8        4    19     4.222222        4.349206
## 8        9        4    19     3.833333        4.088889
## 9        8        5    18     4.214286        4.236111
## 10       6        5    17     4.000000        3.730159
## 11       9        5    17     3.875000        3.944444
## 12       5        7    15     4.333333        4.800000
## 13       5        4    13     3.769231        3.577778
## 14       5        6    13     4.181818        4.111111
## 15       8        7    13     4.375000        4.555556
## 16       6        3    12     4.200000        4.083333
## 17       6        7    12     3.818182        3.600000
## 18       7        5    12     4.166667        4.138889
## 19       6        6    11     4.090909        4.222222
## 20       7        3    10     4.000000        3.861111
## 21       7        6     9     4.142857        4.277778
## 22       4        4     8     3.857143        4.055556
## 23       9        3     7     4.142857        3.955556
## 24       4        5     6     4.333333        4.333333
## 25       4        7     5     3.400000        3.833333
## 26       7        4     5     3.250000        3.814815
## 27       6        8     4     4.000000        4.555556
## 28       8        3     4     4.250000        4.000000
## 29      10        6     4     4.500000        4.055556
## 30       5        5     3     4.500000        3.777778
## 31       9        7     3     2.500000        2.722222
## 32      10        4     3     5.000000        4.444444
## 33       3        5     2     4.000000        5.000000
## 34       5        3     2     4.000000        5.000000
## 35      10        3     2     4.000000        4.000000
## 36       7        8     1     5.000000        4.888889
figure <- ggarrange(x2, y2, z2,
                   nrow = 2, ncol = 2)
figure

#grouped average score for answered questions
survey_satisfaction <- survey %>%
  mutate(average_score = rowMeans(survey[, 2:10], na.rm= TRUE)) %>%
  na.omit(average_score) %>%
  group_by(grp.lat, grp.long) %>%
  summarise(Overall_average = mean(average_score),
            Surveys = n())

#grouped number of arrests by latlong
arrests2 <- arrests %>%
  group_by(grp.lat, grp.long) %>%
  summarise(num_arrests = n())

#combined grouped latlongs with number of arrests and average survey scores
combined <- left_join(survey_satisfaction, arrests2, by = c("grp.lat", "grp.long")) %>%
  mutate_at(vars(num_arrests),~replace(., is.na(.), 0) )

combined
## # A tibble: 36 x 5
## # Groups:   grp.lat [8]
##    grp.lat grp.long Overall_average Surveys num_arrests
##      <int>    <int>           <dbl>   <int>       <dbl>
##  1       3        3            3.82      13          99
##  2       3        5            5          1           4
##  3       4        3            4.26      10         209
##  4       4        4            4.06       4         224
##  5       4        5            4.33       4          85
##  6       4        7            3.83       2          49
##  7       5        3            5          1         718
##  8       5        4            3.58       5        5546
##  9       5        5            3.78       1          91
## 10       5        6            4.11       5         232
## # ... with 26 more rows
ggplot(combined, aes(x=Overall_average, y=num_arrests)) + 
  geom_point(color='steel blue') +
  ggtitle("Average Score by Number of Arrests in LatLong Group") +
  ylab("Number of Arrests in LatLong")+
  xlab("Average Police Score in LatLong") + 
  theme_minimal()

#filter(combined, !(grp.lat == 9 & grp.long == 7)) %>%
 # ggplot(aes(x=Overall_average, y=num_arrests)) + 
  #geom_point(color='steel blue') +
  #ggtitle("Average Score by Number of Arrests in LatLong Group") +
  #ylab("Number of Arrests in LatLong")+
  #xlab("Average Police Score in LatLong") + 
  #theme_minimal() 

Arrest Data

qmplot(longitude, latitude, data=arrests, geom = "blank", 
  zoom = 13, maptype = "toner-background",legend = "topright") +
  stat_density_2d(aes(fill = ..level..), size=0.01, bins=80,  geom = "polygon", alpha = .3, color = NA) +
  scale_fill_viridis_c()+
  guides(fill = guide_colorbar(barwidth = 1.5, barheight = 10)) +
  ggtitle("Density Map of Arrests")

t <- arrests %>%
  filter(grepl("DWI", arrests$`Primary Charge`, ignore.case = TRUE))
w <-qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("dark blue"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8, zoom=12) + 
  geom_hline(yintercept = vec_group_lat, alpha = .5) +
  geom_vline(xintercept = vec_group_long, alpha = .5) +
  ggtitle("DWI Arrests")
t <- arrests %>%
  filter(grepl("OPEN", arrests$`Primary Charge`, ignore.case = TRUE))
x <-qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("red"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8, zoom = 12) + 
  geom_hline(yintercept = vec_group_lat, alpha = .5) +
  geom_vline(xintercept = vec_group_long, alpha = .5) +
  ggtitle("Open Container Arrests")
t <- arrests %>%
  filter(grepl("APPEAR", arrests$`Primary Charge`, ignore.case = TRUE))
y <-qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("orange"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8, zoom = 12) + 
  geom_hline(yintercept = vec_group_lat, alpha = .5) +
  geom_vline(xintercept = vec_group_long, alpha = .5) +
  ggtitle("Failure to Appear Arrests")
t <- arrests %>%
  filter(grepl("ASSAULT", arrests$`Primary Charge`, ignore.case = TRUE))
z <-qmplot(longitude, latitude, data = t, zoom = 12, maptype='toner-lite',color=I("purple"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8) + 
  geom_hline(yintercept = vec_group_lat, alpha = .5) +
  geom_vline(xintercept = vec_group_long, alpha = .5) +
  ggtitle("Assault Arrests")

library(ggpubr)
figure <- ggarrange(w, x, y, z,
                    ncol = 2, nrow = 2)
figure

t <- arrests %>%
  filter(grepl("MARIJUANA", arrests$`Primary Charge`, ignore.case = TRUE))
x <- qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("dark green"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8) + 
  geom_hline(yintercept = vec_group_lat, alpha=.5) +
  geom_vline(xintercept = vec_group_long, alpha=.5) +
  ggtitle("Marijuana Arrests")
t <- arrests %>%
  filter(grepl("LARCENY", arrests$`Primary Charge`, ignore.case = TRUE))
y <- qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("sky blue"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8) + 
  geom_hline(yintercept = vec_group_lat, alpha=.5) +
  geom_vline(xintercept = vec_group_long, alpha=.5) +
  ggtitle("Larceny Arrests")
t <- arrests %>%
  filter(grepl("SHOP", arrests$`Primary Charge`, ignore.case = TRUE))
z <- qmplot(longitude, latitude, data = t, maptype='toner-lite',color=I("magenta"), 
         xlim= c(vec_group_long[1], vec_group_long[11]), 
         ylim= c(vec_group_lat[1], vec_group_lat[11]), f=.8) + 
  geom_hline(yintercept = vec_group_lat, alpha=.5) +
  geom_vline(xintercept = vec_group_long, alpha=.5) + 
  ggtitle("Shoplifting Arrests")

figure <- ggarrange(x, y, z,
                    nrow = 2, ncol = 2)
figure

arrest2 <- arrests  %>%
  select(latitude:longitude) %>%
  mutate(latitude = pi*latitude/180, longitude = pi*longitude/180)

haversine <- function(lat1, lat2, long1, long2){
  # function written for transparency in matching with haversine formula coordinates must be in radians
  h <- sin(.5 * (lat2 - lat1)) ^ 2 + cos(lat1) * cos(lat2) * (sin(.5 * (long2 - long1)) ^ 2)
  # earth diameter taken from http://imagine.gsfc.nasa.gov/features/cosmic/earth_info.html in miles
  r <- (12756 / 2) * 0.621371
  return(2 * r * asin(sqrt(h)))
}

d <- function(x, y){
  haversine(x[1], y[1], x[2], y[2])
}

library(proxy)
#Dmat <- proxy::dist(arrest2, method = d)

library(cluster)
K <- 5
set.seed(1305)
#m <- pam(Dmat, k = K, diss = TRUE)
load("pamobject_arrests.Rdata")
arrest2 <- mutate(arrest2, cluster = factor(m$cluster))
group_by(arrest2, cluster) %>% 
  summarise(n = n(), latitude = mean(latitude),
            longitude = mean(longitude))
## # A tibble: 5 x 4
##   cluster     n latitude longitude
##   <fct>   <int>    <dbl>     <dbl>
## 1 1        3217    0.627     -1.38
## 2 2        5796    0.627     -1.38
## 3 3        3560    0.627     -1.38
## 4 4        1675    0.628     -1.38
## 5 5        1835    0.627     -1.38
library(ggmap)
register_google(key = "AIzaSyAz_F8FVVhZbjfsz4rCz4cEigy6__5cQKc")
x <- mutate(arrest2, latitude = 180*latitude/pi,  longitude = 180*longitude/pi) %>%
  filter(longitude < -78.95 & longitude >-79.11 & latitude > 35.85 & latitude < 35.98) 
  
qmplot(longitude, latitude, data=x, maptype='toner-lite',color=cluster)

a <- qmplot(longitude, latitude, data=arrests, maptype='toner-lite',color=I("red"), zoom =13) + geom_hline(yintercept = vec_group_lat, alpha=.5) +geom_vline(xintercept = vec_group_long, alpha=.5) + ggtitle("Arrests Divided into 100 Sectors")

x2 <- arrests %>%
  group_by(grp.lat, grp.long) %>%
  summarise(count=n())

b <- ggplot(x2, aes(x=grp.long, y=grp.lat, fill = count)) + 
  geom_tile() + 
  scale_fill_viridis_c() + 
  theme_classic() +
  xlim(.5, 10.5) +
  scale_y_continuous(breaks = seq(1, 10, by = 1)) +
  scale_x_continuous( breaks = seq(1, 10, by = 1)) +
  theme(axis.ticks.x = element_blank(),
        axis.ticks.y = element_blank()) + 
  geom_vline(xintercept = seq(.5, 10.5, 1)) +
  geom_hline(yintercept = seq(.5, 10.5, 1)) +
  ggtitle("Arrests Per LatLong Group") +
  xlab("Longitude Group") +
  ylab("Latitude Group")

x2
## # A tibble: 68 x 3
## # Groups:   grp.lat [10]
##    grp.lat grp.long count
##      <int>    <int> <int>
##  1       1        2    16
##  2       1        3     2
##  3       2        2     2
##  4       2        3    13
##  5       2        5     3
##  6       3        2     7
##  7       3        3    99
##  8       3        4    39
##  9       3        5     4
## 10       4        2     5
## # ... with 58 more rows
figure <-ggarrange(a,b,nrow=1, ncol=2, widths = c(15, 25))
figure